use std::mem::{self, take};

use crate::asm::*;
use crate::asm::x86_64::*;
use crate::codegen::split_patch_point;
use crate::stats::CompileError;
use crate::virtualmem::CodePtr;
use crate::cruby::*;
use crate::backend::lir::*;
use crate::cast::*;
use crate::options::asm_dump;

// Use the x86 register type for this platform
pub type Reg = X86Reg;

/// Convert reg_no for MemBase::Reg into Reg, assuming it's a 64-bit GP register
pub fn mem_base_reg(reg_no: u8) -> Reg {
    Reg { num_bits: 64, reg_type: RegType::GP, reg_no }
}

// Callee-saved registers
pub const CFP: Opnd = Opnd::Reg(R13_REG);
pub const EC: Opnd = Opnd::Reg(R12_REG);
pub const SP: Opnd = Opnd::Reg(RBX_REG);

// C argument registers on this platform
pub const C_ARG_OPNDS: [Opnd; 6] = [
    Opnd::Reg(RDI_REG),
    Opnd::Reg(RSI_REG),
    Opnd::Reg(RDX_REG),
    Opnd::Reg(RCX_REG),
    Opnd::Reg(R8_REG),
    Opnd::Reg(R9_REG)
];

// C return value register on this platform
pub const C_RET_REG: Reg = RAX_REG;
pub const C_RET_OPND: Opnd = Opnd::Reg(RAX_REG);
pub const NATIVE_STACK_PTR: Opnd = Opnd::Reg(RSP_REG);
pub const NATIVE_BASE_PTR: Opnd = Opnd::Reg(RBP_REG);

impl CodeBlock {
    // The number of bytes that are generated by jmp_ptr
    pub fn jmp_ptr_bytes(&self) -> usize { 5 }
}

/// Map Opnd to X86Opnd
impl From<Opnd> for X86Opnd {
    fn from(opnd: Opnd) -> Self {
        match opnd {
            // NOTE: these operand types need to be lowered first
            //Value(VALUE),       // Immediate Ruby value, may be GC'd, movable
            //VReg(usize),     // Output of a preceding instruction in this block

            Opnd::VReg{..} => panic!("VReg operand made it past register allocation"),

            Opnd::UImm(val) => uimm_opnd(val),
            Opnd::Imm(val) => imm_opnd(val),
            Opnd::Value(VALUE(uimm)) => uimm_opnd(uimm as u64),

            // General-purpose register
            Opnd::Reg(reg) => X86Opnd::Reg(reg),

            // Memory operand with displacement
            Opnd::Mem(Mem{ base: MemBase::Reg(reg_no), num_bits, disp }) => {
                let reg = X86Reg {
                    reg_no,
                    num_bits: 64,
                    reg_type: RegType::GP
                };

                mem_opnd(num_bits, X86Opnd::Reg(reg), disp)
            }

            Opnd::None => panic!(
                "Attempted to lower an Opnd::None. This often happens when an out operand was not allocated for an instruction because the output of the instruction was not used. Please ensure you are using the output."
            ),

            _ => panic!("unsupported x86 operand type: {opnd:?}")
        }
    }
}

/// Also implement going from a reference to an operand for convenience.
impl From<&Opnd> for X86Opnd {
    fn from(opnd: &Opnd) -> Self {
        X86Opnd::from(*opnd)
    }
}

/// List of registers that can be used for register allocation.
/// This has the same number of registers for x86_64 and arm64.
/// SCRATCH0_OPND is excluded.
pub const ALLOC_REGS: &[Reg] = &[
    RDI_REG,
    RSI_REG,
    RDX_REG,
    RCX_REG,
    R8_REG,
    R9_REG,
    RAX_REG,
];

/// Special scratch register for intermediate processing. It should be used only by
/// [`Assembler::x86_scratch_split`] or [`Assembler::new_with_scratch_reg`].
const SCRATCH0_OPND: Opnd = Opnd::Reg(R11_REG);
const SCRATCH1_OPND: Opnd = Opnd::Reg(R10_REG);

impl Assembler {
    /// Return an Assembler with scratch registers disabled in the backend, and a scratch register.
    pub fn new_with_scratch_reg() -> (Self, Opnd) {
        (Self::new_with_accept_scratch_reg(true), SCRATCH0_OPND)
    }

    /// Return true if opnd contains a scratch reg
    pub fn has_scratch_reg(opnd: Opnd) -> bool {
        Self::has_reg(opnd, SCRATCH0_OPND.unwrap_reg())
    }

    /// Get the list of registers from which we can allocate on this platform
    pub fn get_alloc_regs() -> Vec<Reg> {
        ALLOC_REGS.to_vec()
    }

    /// Get a list of all of the caller-save registers
    pub fn get_caller_save_regs() -> Vec<Reg> {
        vec![RAX_REG, RCX_REG, RDX_REG, RSI_REG, RDI_REG, R8_REG, R9_REG, R10_REG, R11_REG]
    }

    /// How many bytes a call and a bare bones [Self::frame_setup] would change native SP
    pub fn frame_size() -> i32 {
        0x10
    }

    // These are the callee-saved registers in the x86-64 SysV ABI
    // RBX, RSP, RBP, and R12–R15

    /// Split IR instructions for the x86 platform
    fn x86_split(mut self) -> Assembler
    {
        let mut asm_local = Assembler::new_with_asm(&self);
        let asm = &mut asm_local;
        let live_ranges: Vec<LiveRange> = take(&mut self.live_ranges);
        let mut iterator = self.instruction_iterator();

        while let Some((index, mut insn)) = iterator.next(asm) {
            let is_load = matches!(insn, Insn::Load { .. } | Insn::LoadInto { .. });
            let mut opnd_iter = insn.opnd_iter_mut();

            while let Some(opnd) = opnd_iter.next() {
                // Lower Opnd::Value to Opnd::VReg or Opnd::UImm
                match opnd {
                    Opnd::Value(value) if !is_load => {
                        // Since mov(mem64, imm32) sign extends, as_i64() makes sure
                        // we split when the extended value is different.
                        *opnd = if !value.special_const_p() || imm_num_bits(value.as_i64()) > 32 {
                            asm.load(*opnd)
                        } else {
                            Opnd::UImm(value.as_u64())
                        }
                    }
                    _ => {},
                };
            }

            // When we split an operand, we can create a new VReg not in `live_ranges`.
            // So when we see a VReg with out-of-range index, it's created from splitting
            // from the loop above and we know it doesn't outlive the current instruction.
            let vreg_outlives_insn = |vreg_idx| {
                live_ranges
                    .get(vreg_idx)
                    .is_some_and(|live_range: &LiveRange| live_range.end() > index)
            };

            // We are replacing instructions here so we know they are already
            // being used. It is okay not to use their output here.
            #[allow(unused_must_use)]
            match &mut insn {
                Insn::Add { left, right, out } |
                Insn::Sub { left, right, out } |
                Insn::Mul { left, right, out } |
                Insn::And { left, right, out } |
                Insn::Or { left, right, out } |
                Insn::Xor { left, right, out } => {
                    match (&left, &right, iterator.peek().map(|(_, insn)| insn)) {
                        // Merge this insn, e.g. `add REG, right -> out`, and `mov REG, out` if possible
                        (Opnd::Reg(_), Opnd::UImm(value), Some(Insn::Mov { dest, src }))
                        if out == src && left == dest && live_ranges[out.vreg_idx()].end() == index + 1 && uimm_num_bits(*value) <= 32 => {
                            *out = *dest;
                            asm.push_insn(insn);
                            iterator.next(asm); // Pop merged Insn::Mov
                        }
                        (Opnd::Reg(_), Opnd::Reg(_), Some(Insn::Mov { dest, src }))
                        if out == src && live_ranges[out.vreg_idx()].end() == index + 1 && *dest == *left => {
                            *out = *dest;
                            asm.push_insn(insn);
                            iterator.next(asm); // Pop merged Insn::Mov
                        }
                        _ => {
                            match (*left, *right) {
                                (Opnd::Mem(_), Opnd::Mem(_)) => {
                                    *left = asm.load(*left);
                                    *right = asm.load(*right);
                                },
                                (Opnd::Mem(_), Opnd::UImm(_) | Opnd::Imm(_)) => {
                                    *left = asm.load(*left);
                                },
                                // Instruction output whose live range spans beyond this instruction
                                (Opnd::VReg { idx, .. }, _) => {
                                    if vreg_outlives_insn(idx) {
                                        *left = asm.load(*left);
                                    }
                                },
                                // We have to load memory operands to avoid corrupting them
                                (Opnd::Mem(_), _) => {
                                    *left = asm.load(*left);
                                },
                                // We have to load register operands to avoid corrupting them
                                (Opnd::Reg(_), _) => {
                                    if *left != *out {
                                        *left = asm.load(*left);
                                    }
                                },
                                // The first operand can't be an immediate value
                                (Opnd::UImm(_), _) => {
                                    *left = asm.load(*left);
                                }
                                _ => {}
                            }
                            asm.push_insn(insn);
                        }
                    }
                },
                Insn::Cmp { left, right } => {
                    // Replace `cmp REG, 0` (4 bytes) with `test REG, REG` (3 bytes)
                    // when next IR is `je`, `jne`, `csel_e`, or `csel_ne`
                    match (&left, &right, iterator.peek().map(|(_, insn)| insn)) {
                        (Opnd::VReg { .. },
                         Opnd::UImm(0) | Opnd::Imm(0),
                         Some(Insn::Je(_) | Insn::Jne(_) | Insn::CSelE { .. } | Insn::CSelNE { .. })) => {
                            asm.push_insn(Insn::Test { left: *left, right: *left });
                        }
                        _ => {
                            // Split the instruction if `cmp` can't be encoded with given operands
                            match (&left, &right) {
                                // One of the operands should not be a memory operand
                                (Opnd::Mem(_), Opnd::Mem(_)) => {
                                    *right = asm.load(*right);
                                }
                                // The left operand needs to be either a register or a memory operand
                                (Opnd::UImm(_) | Opnd::Imm(_), _) => {
                                    *left = asm.load(*left);
                                }
                                _ => {},
                            }
                            asm.push_insn(insn);
                        }
                    }
                },
                Insn::Test { left, right } => {
                    match (&left, &right) {
                        (Opnd::Mem(_), Opnd::Mem(_)) => {
                            *right = asm.load(*right);
                        }
                        // The first operand can't be an immediate value
                        (Opnd::UImm(_) | Opnd::Imm(_), _) => {
                            *left = asm.load(*left);
                        }
                        _ => {}
                    }
                    asm.push_insn(insn);
                },
                // These instructions modify their input operand in-place, so we
                // may need to load the input value to preserve it
                Insn::LShift { opnd, .. } |
                Insn::RShift { opnd, .. } |
                Insn::URShift { opnd, .. } => {
                    match opnd {
                        // Instruction output whose live range spans beyond this instruction
                        Opnd::VReg { idx, .. } => {
                            if vreg_outlives_insn(*idx) {
                                *opnd = asm.load(*opnd);
                            }
                        },
                        // We have to load non-reg operands to avoid corrupting them
                        Opnd::Mem(_) | Opnd::Reg(_) | Opnd::UImm(_) | Opnd::Imm(_) => {
                            *opnd = asm.load(*opnd);
                        },
                        _ => {}
                    }
                    asm.push_insn(insn);
                },
                Insn::CSelZ { truthy, falsy, .. } |
                Insn::CSelNZ { truthy, falsy, .. } |
                Insn::CSelE { truthy, falsy, .. } |
                Insn::CSelNE { truthy, falsy, .. } |
                Insn::CSelL { truthy, falsy, .. } |
                Insn::CSelLE { truthy, falsy, .. } |
                Insn::CSelG { truthy, falsy, .. } |
                Insn::CSelGE { truthy, falsy, .. } => {
                    match *truthy {
                        // If we have an instruction output whose live range
                        // spans beyond this instruction, we have to load it.
                        Opnd::VReg { idx, .. } => {
                            if vreg_outlives_insn(idx) {
                                *truthy = asm.load(*truthy);
                            }
                        },
                        Opnd::UImm(_) | Opnd::Imm(_) => {
                            *truthy = asm.load(*truthy);
                        },
                        // Opnd::Value could have already been split
                        Opnd::Value(_) if !matches!(truthy, Opnd::VReg { .. }) => {
                            *truthy = asm.load(*truthy);
                        },
                        _ => {}
                    }

                    match falsy {
                        Opnd::UImm(_) | Opnd::Imm(_) => {
                            *falsy = asm.load(*falsy);
                        },
                        _ => {}
                    }

                    asm.push_insn(insn);
                },
                Insn::Mov { dest, src } => {
                    if let Opnd::Mem(_) = dest {
                        asm.store(*dest, *src);
                    } else {
                        asm.mov(*dest, *src);
                    }
                },
                Insn::Not { opnd, .. } => {
                    match *opnd {
                        // If we have an instruction output whose live range
                        // spans beyond this instruction, we have to load it.
                        Opnd::VReg { idx, .. } => {
                            if vreg_outlives_insn(idx) {
                                *opnd = asm.load(*opnd);
                            }
                        },
                        // We have to load memory and register operands to avoid
                        // corrupting them.
                        Opnd::Mem(_) | Opnd::Reg(_) => {
                            *opnd = asm.load(*opnd);
                        },
                        // Otherwise we can just reuse the existing operand.
                        _ => {},
                    };
                    asm.push_insn(insn);
                },
                Insn::CCall { opnds, .. } => {
                    assert!(opnds.len() <= C_ARG_OPNDS.len());

                    // Load each operand into the corresponding argument register.
                    if !opnds.is_empty() {
                        let mut args: Vec<(Opnd, Opnd)> = vec![];
                        for (idx, opnd) in opnds.iter_mut().enumerate() {
                            args.push((C_ARG_OPNDS[idx], *opnd));
                        }
                        asm.parallel_mov(args);
                    }

                    // Now we push the CCall without any arguments so that it
                    // just performs the call.
                    *opnds = vec![];
                    asm.push_insn(insn);
                },
                Insn::Lea { .. } => {
                    // Merge `lea` and `mov` into a single `lea` when possible
                    match (&insn, iterator.peek().map(|(_, insn)| insn)) {
                        (Insn::Lea { opnd, out }, Some(Insn::Mov { dest: Opnd::Reg(reg), src }))
                        if matches!(out, Opnd::VReg { .. }) && out == src && live_ranges[out.vreg_idx()].end() == index + 1 => {
                            asm.push_insn(Insn::Lea { opnd: *opnd, out: Opnd::Reg(*reg) });
                            iterator.next(asm); // Pop merged Insn::Mov
                        }
                        _ => asm.push_insn(insn),
                    }
                },
                _ => {
                    asm.push_insn(insn);
                }
            }
        }

        asm_local
    }

    /// Split instructions using scratch registers. To maximize the use of the register pool
    /// for VRegs, most splits should happen in [`Self::x86_split`]. However, some instructions
    /// need to be split with registers after `alloc_regs`, e.g. for `compile_exits`, so
    /// this splits them and uses scratch registers for it.
    pub fn x86_scratch_split(mut self) -> Assembler {
        /// For some instructions, we want to be able to lower a 64-bit operand
        /// without requiring more registers to be available in the register
        /// allocator. So we just use the SCRATCH0_OPND register temporarily to hold
        /// the value before we immediately use it.
        fn split_64bit_immediate(asm: &mut Assembler, opnd: Opnd, scratch_opnd: Opnd) -> Opnd {
            match opnd {
                Opnd::Imm(value) => {
                    // 32-bit values will be sign-extended
                    if imm_num_bits(value) > 32 {
                        asm.mov(scratch_opnd, opnd);
                        scratch_opnd
                    } else {
                        opnd
                    }
                },
                Opnd::UImm(value) => {
                    // 32-bit values will be sign-extended
                    if imm_num_bits(value as i64) > 32 {
                        asm.mov(scratch_opnd, opnd);
                        scratch_opnd
                    } else {
                        Opnd::Imm(value as i64)
                    }
                },
                _ => opnd
            }
        }

        /// If a given operand is Opnd::Mem and it uses MemBase::Stack, lower it to MemBase::Reg using a scratch regsiter.
        fn split_stack_membase(asm: &mut Assembler, opnd: Opnd, scratch_opnd: Opnd, stack_state: &StackState) -> Opnd {
            if let Opnd::Mem(Mem { base: stack_membase @ MemBase::Stack { .. }, disp, num_bits }) = opnd {
                let base = Opnd::Mem(stack_state.stack_membase_to_mem(stack_membase));
                asm.load_into(scratch_opnd, base);
                Opnd::Mem(Mem { base: MemBase::Reg(scratch_opnd.unwrap_reg().reg_no), disp, num_bits })
            } else {
                opnd
            }
        }

        /// If opnd is Opnd::Mem, set scratch_reg to *opnd. Return Some(Opnd::Mem) if it needs to be written back from scratch_reg.
        fn split_memory_write(opnd: &mut Opnd, scratch_opnd: Opnd) -> Option<Opnd> {
            if let Opnd::Mem(_) = opnd {
                let mem_opnd = opnd.clone();
                *opnd = opnd.num_bits().map(|num_bits| scratch_opnd.with_num_bits(num_bits)).unwrap_or(scratch_opnd);
                Some(mem_opnd)
            } else {
                None
            }
        }

        /// If both opnd and other are Opnd::Mem, split opnd with scratch_opnd.
        fn split_if_both_memory(asm: &mut Assembler, opnd: Opnd, other: Opnd, scratch_opnd: Opnd) -> Opnd {
            if let (Opnd::Mem(_), Opnd::Mem(_)) = (opnd, other) {
                asm.load_into(scratch_opnd.with_num_bits(opnd.rm_num_bits()), opnd);
                scratch_opnd.with_num_bits(opnd.rm_num_bits())
            } else {
                opnd
            }
        }

        /// Move src to dst, splitting it with scratch_opnd if it's a Mem-to-Mem move. Skip it if dst == src.
        fn asm_mov(asm: &mut Assembler, dst: Opnd, src: Opnd, scratch_opnd: Opnd) {
            if dst != src {
                if let (Opnd::Mem(_), Opnd::Mem(_)) = (dst, src) {
                    asm.mov(scratch_opnd, src);
                    asm.mov(dst, scratch_opnd);
                } else {
                    asm.mov(dst, src);
                }
            }
        }

        // Prepare StackState to lower MemBase::Stack
        let stack_state = StackState::new(self.stack_base_idx);

        let mut asm_local = Assembler::new_with_asm(&self);
        let asm = &mut asm_local;
        asm.accept_scratch_reg = true;
        let mut iterator = self.instruction_iterator();

        while let Some((_, mut insn)) = iterator.next(asm) {
            match &mut insn {
                Insn::Add { left, right, out } |
                Insn::Sub { left, right, out } |
                Insn::And { left, right, out } |
                Insn::Or  { left, right, out } |
                Insn::Xor { left, right, out } => {
                    *left = split_stack_membase(asm, *left, SCRATCH0_OPND, &stack_state);
                    *left = split_if_both_memory(asm, *left, *right, SCRATCH0_OPND);
                    *right = split_stack_membase(asm, *right, SCRATCH1_OPND, &stack_state);
                    *right = split_64bit_immediate(asm, *right, SCRATCH1_OPND);

                    let (out, left) = (*out, *left);
                    asm.push_insn(insn);
                    asm_mov(asm, out, left, SCRATCH0_OPND);
                }
                Insn::Mul { left, right, out } => {
                    *left = split_stack_membase(asm, *left, SCRATCH0_OPND, &stack_state);
                    *left = split_if_both_memory(asm, *left, *right, SCRATCH0_OPND);
                    *right = split_stack_membase(asm, *right, SCRATCH1_OPND, &stack_state);
                    *right = split_64bit_immediate(asm, *right, SCRATCH1_OPND);

                    // imul doesn't have (Mem, Reg) encoding. Swap left and right in that case.
                    if let (Opnd::Mem(_), Opnd::Reg(_)) = (&left, &right) {
                        mem::swap(left, right);
                    }

                    let (out, left) = (*out, *left);
                    asm.push_insn(insn);
                    asm_mov(asm, out, left, SCRATCH0_OPND);
                }
                &mut Insn::Not { opnd, out } |
                &mut Insn::LShift { opnd, out, .. } |
                &mut Insn::RShift { opnd, out, .. } |
                &mut Insn::URShift { opnd, out, .. } => {
                    asm.push_insn(insn);
                    asm_mov(asm, out, opnd, SCRATCH0_OPND);
                }
                Insn::Test { left, right } |
                Insn::Cmp { left, right } => {
                    *left = split_stack_membase(asm, *left, SCRATCH1_OPND, &stack_state);
                    *right = split_stack_membase(asm, *right, SCRATCH0_OPND, &stack_state);
                    *right = split_if_both_memory(asm, *right, *left, SCRATCH0_OPND);

                    let num_bits = match right {
                        Opnd::Imm(value) => Some(imm_num_bits(*value)),
                        Opnd::UImm(value) => Some(uimm_num_bits(*value)),
                        _ => None
                    };

                    // If the immediate is less than 64 bits (like 32, 16, 8), and the operand
                    // sizes match, then we can represent it as an immediate in the instruction
                    // without moving it to a register first.
                    // IOW, 64 bit immediates must always be moved to a register
                    // before comparisons, where other sizes may be encoded
                    // directly in the instruction.
                    let use_imm = num_bits.is_some() && left.num_bits() == num_bits && num_bits.unwrap() < 64;
                    if !use_imm {
                        *right = split_64bit_immediate(asm, *right, SCRATCH0_OPND);
                    }
                    asm.push_insn(insn);
                }
                // For compile_exits, support splitting simple C arguments here
                Insn::CCall { opnds, .. } if !opnds.is_empty() => {
                    for (i, opnd) in opnds.iter().enumerate() {
                        asm.load_into(C_ARG_OPNDS[i], *opnd);
                    }
                    *opnds = vec![];
                    asm.push_insn(insn);
                }
                Insn::CSelZ { truthy: left, falsy: right, out } |
                Insn::CSelNZ { truthy: left, falsy: right, out } |
                Insn::CSelE { truthy: left, falsy: right, out } |
                Insn::CSelNE { truthy: left, falsy: right, out } |
                Insn::CSelL { truthy: left, falsy: right, out } |
                Insn::CSelLE { truthy: left, falsy: right, out } |
                Insn::CSelG { truthy: left, falsy: right, out } |
                Insn::CSelGE { truthy: left, falsy: right, out } => {
                    *left = split_stack_membase(asm, *left, SCRATCH1_OPND, &stack_state);
                    *right = split_stack_membase(asm, *right, SCRATCH0_OPND, &stack_state);
                    *right = split_if_both_memory(asm, *right, *left, SCRATCH0_OPND);
                    let mem_out = split_memory_write(out, SCRATCH0_OPND);
                    asm.push_insn(insn);
                    if let Some(mem_out) = mem_out {
                        asm.store(mem_out, SCRATCH0_OPND);
                    }
                }
                Insn::Lea { opnd, out } => {
                    *opnd = split_stack_membase(asm, *opnd, SCRATCH0_OPND, &stack_state);
                    let mem_out = split_memory_write(out, SCRATCH0_OPND);
                    asm.push_insn(insn);
                    if let Some(mem_out) = mem_out {
                        asm.store(mem_out, SCRATCH0_OPND);
                    }
                }
                Insn::LeaJumpTarget { target, out } => {
                    if let Target::Label(_) = target {
                        asm.push_insn(Insn::LeaJumpTarget { out: SCRATCH0_OPND, target: target.clone() });
                        asm.mov(*out, SCRATCH0_OPND);
                    }
                }
                Insn::Load { out, opnd } |
                Insn::LoadInto { dest: out, opnd } => {
                    *opnd = split_stack_membase(asm, *opnd, SCRATCH0_OPND, &stack_state);
                    let mem_out = split_memory_write(out, SCRATCH0_OPND);
                    asm.push_insn(insn);
                    if let Some(mem_out) = mem_out {
                        asm.store(mem_out, SCRATCH0_OPND.with_num_bits(mem_out.rm_num_bits()));
                    }
                }
                // Convert Opnd::const_ptr into Opnd::Mem. This split is done here to give
                // a register for compile_exits.
                &mut Insn::IncrCounter { mem, value } => {
                    assert!(matches!(mem, Opnd::UImm(_)));
                    asm.load_into(SCRATCH0_OPND, mem);
                    asm.incr_counter(Opnd::mem(64, SCRATCH0_OPND, 0), value);
                }
                &mut Insn::Mov { dest, src } => {
                    asm_mov(asm, dest, src, SCRATCH0_OPND);
                }
                // Resolve ParallelMov that couldn't be handled without a scratch register.
                Insn::ParallelMov { moves } => {
                    for (dst, src) in Self::resolve_parallel_moves(&moves, Some(SCRATCH0_OPND)).unwrap() {
                        asm_mov(asm, dst, src, SCRATCH0_OPND);
                    }
                }
                // Handle various operand combinations for spills on compile_exits.
                &mut Insn::Store { dest, src } => {
                    let num_bits = dest.rm_num_bits();
                    let dest = split_stack_membase(asm, dest, SCRATCH1_OPND, &stack_state);

                    let src = match src {
                        Opnd::Reg(_) => src,
                        Opnd::Mem(_) => {
                            asm.mov(SCRATCH0_OPND, src);
                            SCRATCH0_OPND
                        }
                        Opnd::Imm(imm) => {
                            // For 64 bit destinations, 32-bit values will be sign-extended
                            if num_bits == 64 && imm_num_bits(imm) > 32 {
                                asm.mov(SCRATCH0_OPND, src);
                                SCRATCH0_OPND
                            } else if uimm_num_bits(imm as u64) <= num_bits {
                                // If the bit string is short enough for the destination, use the unsigned representation.
                                // Note that 64-bit and negative values are ruled out.
                                Opnd::UImm(imm as u64)
                            } else {
                                src
                            }
                        }
                        Opnd::UImm(imm) => {
                            // For 64 bit destinations, 32-bit values will be sign-extended
                            if num_bits == 64 && imm_num_bits(imm as i64) > 32 {
                                asm.mov(SCRATCH0_OPND, src);
                                SCRATCH0_OPND
                            } else {
                                src.into()
                            }
                        }
                        Opnd::Value(_) => {
                            asm.load_into(SCRATCH0_OPND, src);
                            SCRATCH0_OPND
                        }
                        src @ (Opnd::None | Opnd::VReg { .. }) => panic!("Unexpected source operand during x86_scratch_split: {src:?}"),
                    };
                    asm.store(dest, src);
                }
                &mut Insn::PatchPoint { ref target, invariant, payload } => {
                    split_patch_point(asm, target, invariant, payload);
                }
                _ => {
                    asm.push_insn(insn);
                }
            }
        }

        asm_local
    }

    /// Emit platform-specific machine code
    pub fn x86_emit(&mut self, cb: &mut CodeBlock) -> Option<Vec<CodePtr>> {
        fn emit_csel(
            cb: &mut CodeBlock,
            truthy: Opnd,
            falsy: Opnd,
            out: Opnd,
            cmov_fn: fn(&mut CodeBlock, X86Opnd, X86Opnd),
            cmov_neg: fn(&mut CodeBlock, X86Opnd, X86Opnd)){

            // Assert that output is a register
            out.unwrap_reg();

            // If the truthy value is a memory operand
            if let Opnd::Mem(_) = truthy {
                if out != falsy {
                    mov(cb, out.into(), falsy.into());
                }

                cmov_fn(cb, out.into(), truthy.into());
            } else {
                if out != truthy {
                    mov(cb, out.into(), truthy.into());
                }

                cmov_neg(cb, out.into(), falsy.into());
            }
        }

        fn emit_load_gc_value(cb: &mut CodeBlock, gc_offsets: &mut Vec<CodePtr>, dest_reg: X86Opnd, value: VALUE) {
            // Using movabs because mov might write value in 32 bits
            movabs(cb, dest_reg, value.0 as _);
            // The pointer immediate is encoded as the last part of the mov written out
            let ptr_offset = cb.get_write_ptr().sub_bytes(SIZEOF_VALUE);
            gc_offsets.push(ptr_offset);
        }

        // List of GC offsets
        let mut gc_offsets: Vec<CodePtr> = Vec::new();

        // Buffered list of PosMarker callbacks to fire if codegen is successful
        let mut pos_markers: Vec<(usize, CodePtr)> = vec![];

        // The write_pos for the last Insn::PatchPoint, if any
        let mut last_patch_pos: Option<usize> = None;

        // Install a panic hook to dump Assembler with insn_idx on dev builds
        let (_hook, mut hook_insn_idx) = AssemblerPanicHook::new(self, 0);

        // For each instruction
        let mut insn_idx: usize = 0;
        while let Some(insn) = self.insns.get(insn_idx) {
            // Update insn_idx that is shown on panic
            hook_insn_idx.as_mut().map(|idx| idx.lock().map(|mut idx| *idx = insn_idx).unwrap());

            match insn {
                Insn::Comment(text) => {
                    cb.add_comment(text);
                },

                // Write the label at the current position
                Insn::Label(target) => {
                    cb.write_label(target.unwrap_label());
                },

                // Report back the current position in the generated code
                Insn::PosMarker(..) => {
                    pos_markers.push((insn_idx, cb.get_write_ptr()));
                },

                Insn::BakeString(text) => {
                    for byte in text.as_bytes() {
                        cb.write_byte(*byte);
                    }

                    // Add a null-terminator byte for safety (in case we pass
                    // this to C code)
                    cb.write_byte(0);
                },

                // Set up RBP as frame pointer work with unwinding
                // (e.g. with Linux `perf record --call-graph fp`)
                // and to allow push and pops in the function.
                &Insn::FrameSetup { preserved, mut slot_count } => {
                    // Bump slot_count for alignment if necessary
                    const { assert!(SIZEOF_VALUE == 8, "alignment logic relies on SIZEOF_VALUE == 8"); }
                    let total_slots = 2 /* rbp and return address*/ + slot_count + preserved.len();
                    if total_slots % 2 == 1 {
                        slot_count += 1;
                    }
                    push(cb, RBP);
                    mov(cb, RBP, RSP);
                    for reg in preserved {
                        push(cb, reg.into());
                    }
                    if slot_count > 0 {
                        sub(cb, RSP, uimm_opnd((slot_count * SIZEOF_VALUE) as u64));
                    }
                }
                &Insn::FrameTeardown { preserved } => {
                    let mut preserved_offset = -8;
                    for reg in preserved {
                        mov(cb, reg.into(), mem_opnd(64, RBP, preserved_offset));
                        preserved_offset -= 8;
                    }
                    mov(cb, RSP, RBP);
                    pop(cb, RBP);
                }

                Insn::Add { left, right, .. } => {
                    add(cb, left.into(), right.into());
                },

                Insn::Sub { left, right, .. } => {
                    sub(cb, left.into(), right.into());
                },

                Insn::Mul { left, right, .. } => {
                    imul(cb, left.into(), right.into());
                },

                Insn::And { left, right, .. } => {
                    and(cb, left.into(), right.into());
                },

                Insn::Or { left, right, .. } => {
                    or(cb, left.into(), right.into());
                },

                Insn::Xor { left, right, .. } => {
                    xor(cb, left.into(), right.into());
                },

                Insn::Not { opnd, .. } => {
                    not(cb, opnd.into());
                },

                Insn::LShift { opnd, shift , ..} => {
                    shl(cb, opnd.into(), shift.into())
                },

                Insn::RShift { opnd, shift , ..} => {
                    sar(cb, opnd.into(), shift.into())
                },

                Insn::URShift { opnd, shift, .. } => {
                    shr(cb, opnd.into(), shift.into())
                },

                // This assumes only load instructions can contain references to GC'd Value operands
                Insn::Load { opnd, out } |
                Insn::LoadInto { dest: out, opnd } => {
                    match opnd {
                        Opnd::Value(val) if val.heap_object_p() => {
                            emit_load_gc_value(cb, &mut gc_offsets, out.into(), *val);
                        }
                        _ => mov(cb, out.into(), opnd.into())
                    }
                },

                Insn::LoadSExt { opnd, out } => {
                    movsx(cb, out.into(), opnd.into());
                },

                Insn::ParallelMov { .. } => unreachable!("{insn:?} should have been lowered at alloc_regs()"),

                Insn::Store { dest, src } |
                Insn::Mov { dest, src } => {
                    mov(cb, dest.into(), src.into());
                },

                // Load effective address
                Insn::Lea { opnd, out } => {
                    lea(cb, out.into(), opnd.into());
                },

                // Load address of jump target
                Insn::LeaJumpTarget { target, out } => {
                    if let Target::Label(label) = target {
                        let out = *out;
                        // Set output to the raw address of the label
                        cb.label_ref(*label, 7, move |cb, src_addr, dst_addr| {
                            let disp = dst_addr - src_addr;
                            lea(cb, out.into(), mem_opnd(8, RIP, disp.try_into().unwrap()));
                        });
                    } else {
                        // Set output to the jump target's raw address
                        let target_code = target.unwrap_code_ptr();
                        let target_addr = target_code.raw_addr(cb).as_u64();
                        // Constant encoded length important for patching
                        movabs(cb, out.into(), target_addr);
                    }
                },

                // Push and pop to/from the C stack
                Insn::CPush(opnd) => {
                    push(cb, opnd.into());
                },
                Insn::CPop { out } => {
                    pop(cb, out.into());
                },
                Insn::CPopInto(opnd) => {
                    pop(cb, opnd.into());
                },

                // Push and pop to the C stack all caller-save registers and the
                // flags
                Insn::CPushAll => {
                    let regs = Assembler::get_caller_save_regs();

                    for reg in regs {
                        push(cb, X86Opnd::Reg(reg));
                    }
                    pushfq(cb);
                },
                Insn::CPopAll => {
                    let regs = Assembler::get_caller_save_regs();

                    popfq(cb);
                    for reg in regs.into_iter().rev() {
                        pop(cb, X86Opnd::Reg(reg));
                    }
                },

                // C function call
                Insn::CCall { fptr, .. } => {
                    match fptr {
                        Opnd::UImm(fptr) => {
                            call_ptr(cb, RAX, *fptr as *const u8);
                        }
                        Opnd::Reg(_) => {
                            call(cb, fptr.into());
                        }
                        _ => unreachable!("unsupported ccall fptr: {fptr:?}")
                    }
                },

                Insn::CRet(opnd) => {
                    // TODO: bias allocation towards return register
                    if *opnd != Opnd::Reg(C_RET_REG) {
                        mov(cb, RAX, opnd.into());
                    }

                    ret(cb);
                },

                // Compare
                Insn::Cmp { left, right } => {
                    cmp(cb, left.into(), right.into());
                }

                // Test and set flags
                Insn::Test { left, right } => {
                    test(cb, left.into(), right.into());
                }

                Insn::JmpOpnd(opnd) => {
                    jmp_rm(cb, opnd.into());
                }

                // Conditional jump to a label
                Insn::Jmp(target) => {
                    match *target {
                        Target::CodePtr(code_ptr) => jmp_ptr(cb, code_ptr),
                        Target::Label(label) => jmp_label(cb, label),
                        Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"),
                    }
                }

                Insn::Je(target) => {
                    match *target {
                        Target::CodePtr(code_ptr) => je_ptr(cb, code_ptr),
                        Target::Label(label) => je_label(cb, label),
                        Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"),
                    }
                }

                Insn::Jne(target) => {
                    match *target {
                        Target::CodePtr(code_ptr) => jne_ptr(cb, code_ptr),
                        Target::Label(label) => jne_label(cb, label),
                        Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"),
                    }
                }

                Insn::Jl(target) => {
                    match *target {
                        Target::CodePtr(code_ptr) => jl_ptr(cb, code_ptr),
                        Target::Label(label) => jl_label(cb, label),
                        Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"),
                    }
                },

                Insn::Jg(target) => {
                    match *target {
                        Target::CodePtr(code_ptr) => jg_ptr(cb, code_ptr),
                        Target::Label(label) => jg_label(cb, label),
                        Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"),
                    }
                },

                Insn::Jge(target) => {
                    match *target {
                        Target::CodePtr(code_ptr) => jge_ptr(cb, code_ptr),
                        Target::Label(label) => jge_label(cb, label),
                        Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"),
                    }
                },

                Insn::Jbe(target) => {
                    match *target {
                        Target::CodePtr(code_ptr) => jbe_ptr(cb, code_ptr),
                        Target::Label(label) => jbe_label(cb, label),
                        Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"),
                    }
                },

                Insn::Jb(target) => {
                    match *target {
                        Target::CodePtr(code_ptr) => jb_ptr(cb, code_ptr),
                        Target::Label(label) => jb_label(cb, label),
                        Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"),
                    }
                },

                Insn::Jz(target) => {
                    match *target {
                        Target::CodePtr(code_ptr) => jz_ptr(cb, code_ptr),
                        Target::Label(label) => jz_label(cb, label),
                        Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"),
                    }
                }

                Insn::Jnz(target) => {
                    match *target {
                        Target::CodePtr(code_ptr) => jnz_ptr(cb, code_ptr),
                        Target::Label(label) => jnz_label(cb, label),
                        Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"),
                    }
                }

                Insn::Jo(target) |
                Insn::JoMul(target) => {
                    match *target {
                        Target::CodePtr(code_ptr) => jo_ptr(cb, code_ptr),
                        Target::Label(label) => jo_label(cb, label),
                        Target::SideExit { .. } => unreachable!("Target::SideExit should have been compiled by compile_exits"),
                    }
                }

                Insn::Joz(..) | Insn::Jonz(..) => unreachable!("Joz/Jonz should be unused for now"),

                Insn::PatchPoint { .. } => unreachable!("PatchPoint should have been lowered to PadPatchPoint in x86_scratch_split"),
                Insn::PadPatchPoint => {
                    // If patch points are too close to each other or the end of the block, fill nop instructions
                    if let Some(last_patch_pos) = last_patch_pos {
                        let code_size = cb.get_write_pos().saturating_sub(last_patch_pos);
                        if code_size < cb.jmp_ptr_bytes() {
                            nop(cb, (cb.jmp_ptr_bytes() - code_size) as u32);
                        }
                    }
                    last_patch_pos = Some(cb.get_write_pos());
                },

                // Atomically increment a counter at a given memory location
                Insn::IncrCounter { mem, value } => {
                    assert!(matches!(mem, Opnd::Mem(_)));
                    assert!(matches!(value, Opnd::UImm(_) | Opnd::Imm(_) ) );
                    write_lock_prefix(cb);
                    add(cb, mem.into(), value.into());
                },

                Insn::Breakpoint => int3(cb),

                Insn::CSelZ { truthy, falsy, out } => {
                    emit_csel(cb, *truthy, *falsy, *out, cmovz, cmovnz);
                },
                Insn::CSelNZ { truthy, falsy, out } => {
                    emit_csel(cb, *truthy, *falsy, *out, cmovnz, cmovz);
                },
                Insn::CSelE { truthy, falsy, out } => {
                    emit_csel(cb, *truthy, *falsy, *out, cmove, cmovne);
                },
                Insn::CSelNE { truthy, falsy, out } => {
                    emit_csel(cb, *truthy, *falsy, *out, cmovne, cmove);
                },
                Insn::CSelL { truthy, falsy, out } => {
                    emit_csel(cb, *truthy, *falsy, *out, cmovl, cmovge);
                },
                Insn::CSelLE { truthy, falsy, out } => {
                    emit_csel(cb, *truthy, *falsy, *out, cmovle, cmovg);
                },
                Insn::CSelG { truthy, falsy, out } => {
                    emit_csel(cb, *truthy, *falsy, *out, cmovg, cmovle);
                },
                Insn::CSelGE { truthy, falsy, out } => {
                    emit_csel(cb, *truthy, *falsy, *out, cmovge, cmovl);
                }
                Insn::LiveReg { .. } => (), // just a reg alloc signal, no code
            };

            insn_idx += 1;
        }

        // Error if we couldn't write out everything
        if cb.has_dropped_bytes() {
            None
        } else {
            // No bytes dropped, so the pos markers point to valid code
            for (insn_idx, pos) in pos_markers {
                if let Insn::PosMarker(callback) = self.insns.get(insn_idx).unwrap() {
                    callback(pos, cb);
                } else {
                    panic!("non-PosMarker in pos_markers insn_idx={insn_idx} {self:?}");
                }
            }

            Some(gc_offsets)
        }
    }

    /// Optimize and compile the stored instructions
    pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec<Reg>) -> Result<(CodePtr, Vec<CodePtr>), CompileError> {
        // The backend is allowed to use scratch registers only if it has not accepted them so far.
        let use_scratch_regs = !self.accept_scratch_reg;
        asm_dump!(self, init);

        let asm = self.x86_split();
        asm_dump!(asm, split);

        let mut asm = asm.alloc_regs(regs)?;
        asm_dump!(asm, alloc_regs);

        // We put compile_exits after alloc_regs to avoid extending live ranges for VRegs spilled on side exits.
        asm.compile_exits();
        asm_dump!(asm, compile_exits);

        if use_scratch_regs {
            asm = asm.x86_scratch_split();
            asm_dump!(asm, scratch_split);
        }

        // Create label instances in the code block
        for (idx, name) in asm.label_names.iter().enumerate() {
            let label = cb.new_label(name.to_string());
            assert_eq!(label, Label(idx));
        }

        let start_ptr = cb.get_write_ptr();
        let gc_offsets = asm.x86_emit(cb);

        if let (Some(gc_offsets), false) = (gc_offsets, cb.has_dropped_bytes()) {
            cb.link_labels();
            Ok((start_ptr, gc_offsets))
        } else {
            cb.clear_labels();
            Err(CompileError::OutOfMemory)
        }
    }
}

#[cfg(test)]
mod tests {
    use insta::assert_snapshot;
    use crate::assert_disasm_snapshot;
    use crate::options::rb_zjit_prepare_options;
    use super::*;

    const BOLD_BEGIN: &str = "\x1b[1m";
    const BOLD_END: &str = "\x1b[22m";

    fn setup_asm() -> (Assembler, CodeBlock) {
        rb_zjit_prepare_options(); // for get_option! on asm.compile
        (Assembler::new(), CodeBlock::new_dummy())
    }

    #[test]
    fn test_lir_string() {
        use crate::hir::SideExitReason;

        let mut asm = Assembler::new();
        asm.stack_base_idx = 1;

        let label = asm.new_label("bb0");
        asm.write_label(label.clone());
        asm.push_insn(Insn::Comment("bb0(): foo@/tmp/a.rb:1".into()));
        asm.frame_setup(JIT_PRESERVED_REGS);

        let val64 = asm.add(CFP, Opnd::UImm(64));
        asm.store(Opnd::mem(64, SP, 0x10), val64);
        let side_exit = Target::SideExit { reason: SideExitReason::Interrupt, exit: SideExit { pc: Opnd::const_ptr(0 as *const u8), stack: vec![], locals: vec![] } };
        asm.push_insn(Insn::Joz(val64, side_exit));
        asm.parallel_mov(vec![(C_ARG_OPNDS[0], C_RET_OPND.with_num_bits(32)), (C_ARG_OPNDS[1], Opnd::mem(64, SP, -8))]);

        let val32 = asm.sub(Opnd::Value(Qtrue), Opnd::Imm(1));
        asm.store(Opnd::mem(64, EC, 0x10).with_num_bits(32), val32.with_num_bits(32));
        asm.je(label);
        asm.cret(val64);

        asm.frame_teardown(JIT_PRESERVED_REGS);
        assert_disasm_snapshot!(lir_string(&mut asm), @r"
        bb0:
          # bb0(): foo@/tmp/a.rb:1
          FrameSetup 1, r13, rbx, r12
          v0 = Add r13, 0x40
          Store [rbx + 0x10], v0
          Joz Exit(Interrupt), v0
          ParallelMov rdi <- eax, rsi <- [rbx - 8]
          v1 = Sub Value(0x14), Imm(1)
          Store Mem32[r12 + 0x10], VReg32(v1)
          Je bb0
          CRet v0
          FrameTeardown r13, rbx, r12
        ");
    }

    #[test]
    #[ignore]
    fn test_emit_add_lt_32_bits() {
        let (mut asm, mut cb) = setup_asm();

        let _ = asm.add(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF));
        asm.compile_with_num_regs(&mut cb, 1);

        assert_disasm_snapshot!(cb.disasm(), @r"
        0x0: mov rax, rax
        0x3: add rax, 0xff
        ");
        assert_snapshot!(cb.hexdump(), @"4889c04881c0ff000000");
    }

    #[test]
    #[ignore]
    fn test_emit_add_gt_32_bits() {
        let (mut asm, mut cb) = setup_asm();

        let _ = asm.add(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF));
        asm.compile_with_num_regs(&mut cb, 1);

        assert_disasm_snapshot!(cb.disasm(), @r"
        0x0: mov rax, rax
        0x3: movabs r11, 0xffffffffffff
        0xd: add rax, r11
        ");
        assert_snapshot!(cb.hexdump(), @"4889c049bbffffffffffff00004c01d8");
    }

    #[test]
    #[ignore]
    fn test_emit_and_lt_32_bits() {
        let (mut asm, mut cb) = setup_asm();

        let _ = asm.and(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF));
        asm.compile_with_num_regs(&mut cb, 1);

        assert_disasm_snapshot!(cb.disasm(), @r"
        0x0: mov rax, rax
        0x3: and rax, 0xff
        ");
        assert_snapshot!(cb.hexdump(), @"4889c04881e0ff000000");
    }

    #[test]
    #[ignore]
    fn test_emit_and_gt_32_bits() {
        let (mut asm, mut cb) = setup_asm();

        let _ = asm.and(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF));
        asm.compile_with_num_regs(&mut cb, 1);

        assert_disasm_snapshot!(cb.disasm(), @r"
        0x0: mov rax, rax
        0x3: movabs r11, 0xffffffffffff
        0xd: and rax, r11
        ");
        assert_snapshot!(cb.hexdump(), @"4889c049bbffffffffffff00004c21d8");
    }

    #[test]
    fn test_emit_cmp_lt_32_bits() {
        let (mut asm, mut cb) = setup_asm();

        asm.cmp(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF));
        asm.compile_with_num_regs(&mut cb, 0);

        assert_disasm_snapshot!(cb.disasm(), @"  0x0: cmp rax, 0xff");
        assert_snapshot!(cb.hexdump(), @"4881f8ff000000");
    }

    #[test]
    fn test_emit_cmp_gt_32_bits() {
        let (mut asm, mut cb) = setup_asm();

        asm.cmp(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF));
        asm.compile_with_num_regs(&mut cb, 0);

        assert_disasm_snapshot!(cb.disasm(), @r"
        0x0: movabs r11, 0xffffffffffff
        0xa: cmp rax, r11
        ");
        assert_snapshot!(cb.hexdump(), @"49bbffffffffffff00004c39d8");
    }

    #[test]
    fn test_emit_cmp_64_bits() {
        let (mut asm, mut cb) = setup_asm();

        asm.cmp(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF_FFFF));
        asm.compile_with_num_regs(&mut cb, 0);

        assert_disasm_snapshot!(cb.disasm(), @"  0x0: cmp rax, -1");
        assert_snapshot!(cb.hexdump(), @"4883f8ff");
    }

    #[test]
    fn test_emit_cmp_mem_16_bits_with_imm_16() {
        let (mut asm, mut cb) = setup_asm();

        let shape_opnd = Opnd::mem(16, Opnd::Reg(RAX_REG), 6);

        asm.cmp(shape_opnd, Opnd::UImm(0xF000));
        asm.compile_with_num_regs(&mut cb, 0);

        assert_disasm_snapshot!(cb.disasm(), @"  0x0: cmp word ptr [rax + 6], 0xf000");
        assert_snapshot!(cb.hexdump(), @"6681780600f0");
    }

    #[test]
    fn test_emit_cmp_mem_32_bits_with_imm_32() {
        let (mut asm, mut cb) = setup_asm();

        let shape_opnd = Opnd::mem(32, Opnd::Reg(RAX_REG), 4);

        asm.cmp(shape_opnd, Opnd::UImm(0xF000_0000));
        asm.compile_with_num_regs(&mut cb, 0);

        assert_disasm_snapshot!(cb.disasm(), @"  0x0: cmp dword ptr [rax + 4], 0xf0000000");
        assert_snapshot!(cb.hexdump(), @"817804000000f0");
    }

    #[test]
    #[ignore]
    fn test_emit_or_lt_32_bits() {
        let (mut asm, mut cb) = setup_asm();

        let _ = asm.or(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF));
        asm.compile_with_num_regs(&mut cb, 1);

        assert_disasm_snapshot!(cb.disasm(), @r"
        0x0: mov rax, rax
        0x3: or rax, 0xff
        ");
        assert_snapshot!(cb.hexdump(), @"4889c04881c8ff000000");
    }

    #[test]
    #[ignore]
    fn test_emit_or_gt_32_bits() {
        let (mut asm, mut cb) = setup_asm();

        let _ = asm.or(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF));
        asm.compile_with_num_regs(&mut cb, 1);

        assert_disasm_snapshot!(cb.disasm(), @r"
        0x0: mov rax, rax
        0x3: movabs r11, 0xffffffffffff
        0xd: or rax, r11
        ");
        assert_snapshot!(cb.hexdump(), @"4889c049bbffffffffffff00004c09d8");
    }

    #[test]
    #[ignore]
    fn test_emit_sub_lt_32_bits() {
        let (mut asm, mut cb) = setup_asm();

        let _ = asm.sub(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF));
        asm.compile_with_num_regs(&mut cb, 1);

        assert_disasm_snapshot!(cb.disasm(), @r"
        0x0: mov rax, rax
        0x3: sub rax, 0xff
        ");
        assert_snapshot!(cb.hexdump(), @"4889c04881e8ff000000");
    }

    #[test]
    #[ignore]
    fn test_emit_sub_gt_32_bits() {
        let (mut asm, mut cb) = setup_asm();

        let _ = asm.sub(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF));
        asm.compile_with_num_regs(&mut cb, 1);

        assert_disasm_snapshot!(cb.disasm(), @r"
        0x0: mov rax, rax
        0x3: movabs r11, 0xffffffffffff
        0xd: sub rax, r11
        ");
        assert_snapshot!(cb.hexdump(), @"4889c049bbffffffffffff00004c29d8");
    }

    #[test]
    fn test_emit_test_lt_32_bits() {
        let (mut asm, mut cb) = setup_asm();

        asm.test(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF));
        asm.compile_with_num_regs(&mut cb, 0);

        assert_disasm_snapshot!(cb.disasm(), @"  0x0: test rax, 0xff");
        assert_snapshot!(cb.hexdump(), @"48f7c0ff000000");
    }

    #[test]
    fn test_emit_test_gt_32_bits() {
        let (mut asm, mut cb) = setup_asm();

        asm.test(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF));
        asm.compile_with_num_regs(&mut cb, 0);

        assert_disasm_snapshot!(cb.disasm(), @r"
        0x0: movabs r11, 0xffffffffffff
        0xa: test rax, r11
        ");
        assert_snapshot!(cb.hexdump(), @"49bbffffffffffff00004c85d8");
    }

    #[test]
    #[ignore]
    fn test_emit_xor_lt_32_bits() {
        let (mut asm, mut cb) = setup_asm();

        let _ = asm.xor(Opnd::Reg(RAX_REG), Opnd::UImm(0xFF));
        asm.compile_with_num_regs(&mut cb, 1);

        assert_disasm_snapshot!(cb.disasm(), @r"
        0x0: mov rax, rax
        0x3: xor rax, 0xff
        ");
        assert_snapshot!(cb.hexdump(), @"4889c04881f0ff000000");
    }

    #[test]
    #[ignore]
    fn test_emit_xor_gt_32_bits() {
        let (mut asm, mut cb) = setup_asm();

        let _ = asm.xor(Opnd::Reg(RAX_REG), Opnd::UImm(0xFFFF_FFFF_FFFF));
        asm.compile_with_num_regs(&mut cb, 1);

        assert_disasm_snapshot!(cb.disasm(), @r"
        0x0: mov rax, rax
        0x3: movabs r11, 0xffffffffffff
        0xd: xor rax, r11
        ");
        assert_snapshot!(cb.hexdump(), @"4889c049bbffffffffffff00004c31d8");
    }

    #[test]
    fn test_merge_lea_reg() {
        let (mut asm, mut cb) = setup_asm();

        let sp = asm.lea(Opnd::mem(64, SP, 8));
        asm.mov(SP, sp); // should be merged to lea
        asm.compile_with_num_regs(&mut cb, 1);

        assert_disasm_snapshot!(cb.disasm(), @"  0x0: lea rbx, [rbx + 8]");
        assert_snapshot!(cb.hexdump(), @"488d5b08");
    }

    #[test]
    #[ignore]
    fn test_merge_lea_mem() {
        let (mut asm, mut cb) = setup_asm();

        let sp = asm.lea(Opnd::mem(64, SP, 8));
        asm.mov(Opnd::mem(64, SP, 0), sp); // should NOT be merged to lea
        asm.compile_with_num_regs(&mut cb, 1);

        assert_disasm_snapshot!(cb.disasm(), @r"
        0x0: movabs r11, 0xffffffffffff
        0xa: cmp rax, r11
        ");
        assert_snapshot!(cb.hexdump(), @"49bbffffffffffff00004c39d8");
    }

    #[test]
    #[ignore]
    fn test_replace_cmp_0() {
        let (mut asm, mut cb) = setup_asm();

        let val = asm.load(Opnd::mem(64, SP, 8));
        asm.cmp(val, 0.into());
        let result = asm.csel_e(Qtrue.into(), Qfalse.into());
        asm.mov(Opnd::Reg(RAX_REG), result);
        asm.compile_with_num_regs(&mut cb, 2);

        assert_disasm_snapshot!(cb.disasm(), @r"
        0x0: mov rax, qword ptr [rbx + 8]
        0x4: test rax, rax
        0x7: mov eax, 0x14
        0xc: mov ecx, 0
        0x11: cmovne rax, rcx
        0x15: mov rax, rax
        ");
        assert_snapshot!(cb.hexdump(), @"488b43084885c0b814000000b900000000480f45c14889c0");
    }

    #[test]
    fn test_merge_add_mov() {
        let (mut asm, mut cb) = setup_asm();

        let sp = asm.add(CFP, Opnd::UImm(0x40));
        asm.mov(CFP, sp); // should be merged to add
        asm.compile_with_num_regs(&mut cb, 1);

        assert_disasm_snapshot!(cb.disasm(), @"  0x0: add r13, 0x40");
        assert_snapshot!(cb.hexdump(), @"4983c540");
    }

    #[test]
    fn test_add_into() {
        let (mut asm, mut cb) = setup_asm();

        asm.add_into(CFP, Opnd::UImm(0x40));
        asm.compile_with_num_regs(&mut cb, 1);

        assert_disasm_snapshot!(cb.disasm(), @"  0x0: add r13, 0x40");
        assert_snapshot!(cb.hexdump(), @"4983c540");
    }

    #[test]
    fn test_merge_sub_mov() {
        let (mut asm, mut cb) = setup_asm();

        let sp = asm.sub(CFP, Opnd::UImm(0x40));
        asm.mov(CFP, sp); // should be merged to add
        asm.compile_with_num_regs(&mut cb, 1);

        assert_disasm_snapshot!(cb.disasm(), @"  0x0: sub r13, 0x40");
        assert_snapshot!(cb.hexdump(), @"4983ed40");
    }

    #[test]
    fn test_sub_into() {
        let (mut asm, mut cb) = setup_asm();

        asm.sub_into(CFP, Opnd::UImm(0x40));
        asm.compile_with_num_regs(&mut cb, 1);

        assert_disasm_snapshot!(cb.disasm(), @"  0x0: sub r13, 0x40");
        assert_snapshot!(cb.hexdump(), @"4983ed40");
    }

    #[test]
    fn test_merge_and_mov() {
        let (mut asm, mut cb) = setup_asm();

        let sp = asm.and(CFP, Opnd::UImm(0x40));
        asm.mov(CFP, sp); // should be merged to add
        asm.compile_with_num_regs(&mut cb, 1);

        assert_disasm_snapshot!(cb.disasm(), @"  0x0: and r13, 0x40");
        assert_snapshot!(cb.hexdump(), @"4983e540");
    }

    #[test]
    fn test_merge_or_mov() {
        let (mut asm, mut cb) = setup_asm();

        let sp = asm.or(CFP, Opnd::UImm(0x40));
        asm.mov(CFP, sp); // should be merged to add
        asm.compile_with_num_regs(&mut cb, 1);

        assert_disasm_snapshot!(cb.disasm(), @"  0x0: or r13, 0x40");
        assert_snapshot!(cb.hexdump(), @"4983cd40");
    }

    #[test]
    fn test_merge_xor_mov() {
        let (mut asm, mut cb) = setup_asm();

        let sp = asm.xor(CFP, Opnd::UImm(0x40));
        asm.mov(CFP, sp); // should be merged to add
        asm.compile_with_num_regs(&mut cb, 1);

        assert_disasm_snapshot!(cb.disasm(), @"  0x0: xor r13, 0x40");
        assert_snapshot!(cb.hexdump(), @"4983f540");
    }

    #[test]
    fn test_ccall_resolve_parallel_moves_no_cycle() {
        crate::options::rb_zjit_prepare_options();
        let (mut asm, mut cb) = setup_asm();

        asm.ccall(0 as _, vec![
            C_ARG_OPNDS[0], // mov rdi, rdi (optimized away)
            C_ARG_OPNDS[1], // mov rsi, rsi (optimized away)
        ]);
        asm.compile_with_num_regs(&mut cb, ALLOC_REGS.len());

        assert_disasm_snapshot!(cb.disasm(), @r"
        0x0: mov eax, 0
        0x5: call rax
        ");
        assert_snapshot!(cb.hexdump(), @"b800000000ffd0");
    }

    #[test]
    fn test_ccall_resolve_parallel_moves_single_cycle() {
        crate::options::rb_zjit_prepare_options();
        let (mut asm, mut cb) = setup_asm();

        // rdi and rsi form a cycle
        asm.ccall(0 as _, vec![
            C_ARG_OPNDS[1], // mov rdi, rsi
            C_ARG_OPNDS[0], // mov rsi, rdi
            C_ARG_OPNDS[2], // mov rdx, rdx (optimized away)
        ]);
        asm.compile_with_num_regs(&mut cb, ALLOC_REGS.len());

        assert_disasm_snapshot!(cb.disasm(), @"
            0x0: mov r11, rsi
            0x3: mov rsi, rdi
            0x6: mov rdi, r11
            0x9: mov eax, 0
            0xe: call rax
        ");
        assert_snapshot!(cb.hexdump(), @"4989f34889fe4c89dfb800000000ffd0");
    }

    #[test]
    fn test_ccall_resolve_parallel_moves_two_cycles() {
        crate::options::rb_zjit_prepare_options();
        let (mut asm, mut cb) = setup_asm();

        // rdi and rsi form a cycle, and rdx and rcx form another cycle
        asm.ccall(0 as _, vec![
            C_ARG_OPNDS[1], // mov rdi, rsi
            C_ARG_OPNDS[0], // mov rsi, rdi
            C_ARG_OPNDS[3], // mov rdx, rcx
            C_ARG_OPNDS[2], // mov rcx, rdx
        ]);
        asm.compile_with_num_regs(&mut cb, ALLOC_REGS.len());

        assert_disasm_snapshot!(cb.disasm(), @"
            0x0: mov r11, rsi
            0x3: mov rsi, rdi
            0x6: mov rdi, r11
            0x9: mov r11, rcx
            0xc: mov rcx, rdx
            0xf: mov rdx, r11
            0x12: mov eax, 0
            0x17: call rax
        ");
        assert_snapshot!(cb.hexdump(), @"4989f34889fe4c89df4989cb4889d14c89dab800000000ffd0");
    }

    #[test]
    fn test_ccall_resolve_parallel_moves_large_cycle() {
        crate::options::rb_zjit_prepare_options();
        let (mut asm, mut cb) = setup_asm();

        // rdi, rsi, and rdx form a cycle
        asm.ccall(0 as _, vec![
            C_ARG_OPNDS[1], // mov rdi, rsi
            C_ARG_OPNDS[2], // mov rsi, rdx
            C_ARG_OPNDS[0], // mov rdx, rdi
        ]);
        asm.compile_with_num_regs(&mut cb, ALLOC_REGS.len());

        assert_disasm_snapshot!(cb.disasm(), @"
            0x0: mov r11, rsi
            0x3: mov rsi, rdx
            0x6: mov rdx, rdi
            0x9: mov rdi, r11
            0xc: mov eax, 0
            0x11: call rax
        ");
        assert_snapshot!(cb.hexdump(), @"4989f34889d64889fa4c89dfb800000000ffd0");
    }

    #[test]
    #[ignore]
    fn test_ccall_resolve_parallel_moves_with_insn_out() {
        let (mut asm, mut cb) = setup_asm();

        let rax = asm.load(Opnd::UImm(1));
        let rcx = asm.load(Opnd::UImm(2));
        let rdx = asm.load(Opnd::UImm(3));
        // rcx and rdx form a cycle
        asm.ccall(0 as _, vec![
            rax, // mov rdi, rax
            rcx, // mov rsi, rcx
            rcx, // mov rdx, rcx
            rdx, // mov rcx, rdx
        ]);
        asm.compile_with_num_regs(&mut cb, 3);

        assert_disasm_snapshot!(cb.disasm(), @"
            0x0: mov eax, 1
            0x5: mov ecx, 2
            0xa: mov edx, 3
            0xf: mov rdi, rax
            0x12: mov rsi, rcx
            0x15: mov r11, rcx
            0x18: mov rcx, rdx
            0x1b: mov rdx, r11
            0x1e: mov eax, 0
            0x23: call rax
        ");
        assert_snapshot!(cb.hexdump(), @"b801000000b902000000ba030000004889c74889ce4989cb4889d14c89dab800000000ffd0");
    }

    #[test]
    fn test_cmov_mem() {
        let (mut asm, mut cb) = setup_asm();

        let top = Opnd::mem(64, SP, 0);
        let ary_opnd = SP;
        let array_len_opnd = Opnd::mem(64, SP, 16);

        asm.cmp(array_len_opnd, 1.into());
        let elem_opnd = asm.csel_g(Opnd::mem(64, ary_opnd, 0), Qnil.into());
        asm.mov(top, elem_opnd);

        asm.compile_with_num_regs(&mut cb, 1);

        assert_disasm_snapshot!(cb.disasm(), @"
            0x0: cmp qword ptr [rbx + 0x10], 1
            0x5: mov edi, 4
            0xa: cmovg rdi, qword ptr [rbx]
            0xe: mov qword ptr [rbx], rdi
        ");
        assert_snapshot!(cb.hexdump(), @"48837b1001bf04000000480f4f3b48893b");
    }

    #[test]
    #[ignore]
    fn test_csel_split() {
        let (mut asm, mut cb) = setup_asm();

        let stack_top = Opnd::mem(64, SP, 0);
        let elem_opnd = asm.csel_ne(VALUE(0x7f22c88d1930).into(), Qnil.into());
        asm.mov(stack_top, elem_opnd);

        asm.compile_with_num_regs(&mut cb, 3);

        assert_disasm_snapshot!(cb.disasm(), @"
            0x0: movabs rax, 0x7f22c88d1930
            0xa: mov ecx, 4
            0xf: cmove rax, rcx
            0x13: mov qword ptr [rbx], rax
        ");
        assert_snapshot!(cb.hexdump(), @"48b830198dc8227f0000b904000000480f44c1488903");
    }

    #[test]
    fn test_mov_m32_imm32() {
        let (mut asm, mut cb) = setup_asm();

        let shape_opnd = Opnd::mem(32, C_RET_OPND, 0);
        asm.mov(shape_opnd, Opnd::UImm(0x8000_0001));
        asm.mov(shape_opnd, Opnd::Imm(0x8000_0001));

        asm.compile_with_num_regs(&mut cb, 0);

        assert_disasm_snapshot!(cb.disasm(), @"
            0x0: mov dword ptr [rax], 0x80000001
            0x6: mov dword ptr [rax], 0x80000001
        ");
        assert_snapshot!(cb.hexdump(), @"c70001000080c70001000080");
    }

    #[test]
    fn frame_setup_teardown_preserved_regs() {
        let (mut asm, mut cb) = setup_asm();
        asm.frame_setup(JIT_PRESERVED_REGS);
        asm.frame_teardown(JIT_PRESERVED_REGS);
        asm.cret(C_RET_OPND);
        asm.compile_with_num_regs(&mut cb, 0);

        assert_disasm_snapshot!(cb.disasm(), @r"
        0x0: push rbp
        0x1: mov rbp, rsp
        0x4: push r13
        0x6: push rbx
        0x7: push r12
        0x9: sub rsp, 8
        0xd: mov r13, qword ptr [rbp - 8]
        0x11: mov rbx, qword ptr [rbp - 0x10]
        0x15: mov r12, qword ptr [rbp - 0x18]
        0x19: mov rsp, rbp
        0x1c: pop rbp
        0x1d: ret
        ");
        assert_snapshot!(cb.hexdump(), @"554889e541555341544883ec084c8b6df8488b5df04c8b65e84889ec5dc3");
    }

    #[test]
    fn frame_setup_teardown_stack_base_idx() {
        let (mut asm, mut cb) = setup_asm();
        asm.stack_base_idx = 5;
        asm.frame_setup(&[]);
        asm.frame_teardown(&[]);
        asm.compile_with_num_regs(&mut cb, 0);

        assert_disasm_snapshot!(cb.disasm(), @r"
        0x0: push rbp
        0x1: mov rbp, rsp
        0x4: sub rsp, 0x30
        0x8: mov rsp, rbp
        0xb: pop rbp
        ");
        assert_snapshot!(cb.hexdump(), @"554889e54883ec304889ec5d");
    }

    #[test]
    fn test_store_value_without_split() {
        let (mut asm, mut cb) = setup_asm();

        let imitation_heap_value = VALUE(0x1000);
        assert!(imitation_heap_value.heap_object_p());
        asm.store(Opnd::mem(VALUE_BITS, SP, 0), imitation_heap_value.into());

        asm = asm.x86_scratch_split();
        let gc_offsets = asm.x86_emit(&mut cb).unwrap();
        assert_eq!(1, gc_offsets.len(), "VALUE source operand should be reported as gc offset");

        assert_disasm_snapshot!(cb.disasm(), @"
            0x0: movabs r11, 0x1000
            0xa: mov qword ptr [rbx], r11
        ");
        assert_snapshot!(cb.hexdump(), @"49bb00100000000000004c891b");
    }

    #[test]
    fn test_csel_split_memory_read() {
        let (mut asm, mut cb) = setup_asm();

        let left = Opnd::Mem(Mem { base: MemBase::Stack { stack_idx: 0, num_bits: 64 }, disp: 0, num_bits: 64 });
        let right = Opnd::Mem(Mem { base: MemBase::Stack { stack_idx: 1, num_bits: 64 }, disp: 2, num_bits: 64 });
        let _ = asm.csel_e(left, right);
        asm.compile_with_num_regs(&mut cb, 0);

        assert_disasm_snapshot!(cb.disasm(), @"
            0x0: mov r10, qword ptr [rbp - 8]
            0x4: mov r11, qword ptr [rbp - 0x10]
            0x8: mov r11, qword ptr [r11 + 2]
            0xc: cmove r11, qword ptr [r10]
            0x10: mov qword ptr [rbp - 8], r11
        ");
        assert_snapshot!(cb.hexdump(), @"4c8b55f84c8b5df04d8b5b024d0f441a4c895df8");
    }

    #[test]
    fn test_lea_split_memory_read() {
        let (mut asm, mut cb) = setup_asm();

        let opnd = Opnd::Mem(Mem { base: MemBase::Stack { stack_idx: 0, num_bits: 64 }, disp: 0, num_bits: 64 });
        let _ = asm.lea(opnd);
        asm.compile_with_num_regs(&mut cb, 0);

        assert_disasm_snapshot!(cb.disasm(), @"
            0x0: mov r11, qword ptr [rbp - 8]
            0x4: lea r11, [r11]
            0x7: mov qword ptr [rbp - 8], r11
        ");
        assert_snapshot!(cb.hexdump(), @"4c8b5df84d8d1b4c895df8");
    }
}
